% EXPLAINER: 

%% generates the following:
% Table 2: Structural Preference Parameters
% Results from Table 2 then form basis for Figure 3: Preferences over a Daughteres Age and Education at Marriage
% Table 3: Structural Belief Parameters
% Results from Table 3 then generate Figure 4: Probability of high quality marriage offer
% Figure A.3: Histogram of groom quality
% File also generates the choice probabilities (and boostraps thereof) that are used in stata to generate our predicted trajectories (Figure 5, Figure 6) 

%% File is structured as follows: 
% 0) import data inc set specification for preferences
% 1) Estimate preferences
% 1a) Figure A3: empirical histogram of groom quality (in ex-ante data)
% 2) Bootstrap preferences (Table 2 generated here)
% 3) Estimate overall beliefs (figure 4 generated here)
% 4) export choice probabilities & beliefs 
% 5) bootstrap beliefs (Table 3 generated here)
% 6) Export choice probabilites for each bootstrap (used in stata to
% generate F5 and F6)

%% Note on reproducability. 
% It is a known issue that MATLAB optimization routines will not always reach the 
% exact same solutions between different systems with different OS versions, 
% CPUs, BIOS settings (see
% https://uk.mathworks.com/matlabcentral/answers/2144809-why-do-i-receive-different-results-from-two-separate-runs-of-the-same-matlab-operation)

% In practice we have found that our preference estimates, and bootstraps
% thereof, created in sections 1 and 2 of this file differ between machines
% beginning around the 6th decimal place. As such, these slight differences
% never affect the preference results to the level of precision as reported
% in the paper. We have found slightly larger differences between machines
% in the bootstraps of the belief estimates. In our runs on different 
% machines, we have found differences in estimated standard errors in the
% belief estimates occasionally show up at our 3rd significant figure (but
% generally are confined to later figures that do not affect the results to
% the precision reported in the paper). We haven't found any sensitivity of
% the main estimates between machines. Digging further, we have found that
% the main cause of these differences in the bootstrapped belief estimates 
% is the (very small) differences in the preference estimates which then 
% get passed to the belief estimation. 

% All analysis presented in this paper was created on a Dell laptop with the
% following characteristics: 

    % OS Name	Microsoft Windows 11 Pro
    % Version	10.0.22621 Build 22621
    % OS Manufacturer	Microsoft Corporation
    % System Manufacturer	Dell Inc.
    % System Model	XPS 15 9520
    % System Type	x64-based PC
    % System SKU	0B19
    % Processor	12th Gen Intel(R) Core(TM) i9-12900HK, 2500 Mhz, 14 Core(s), 20 Logical Processor(s)
    % BIOS Version/Date	Dell Inc. 1.29.0, 11/12/2024

% In our replication package, we include all the output generated in this
% analysis in the "Output" folder. In particular "Output/bs_results"
% contains the preference and belief parameter estimates at every bootstrap. 
% Files in this replication package write to the "Output_New" folder which 
% is empty. When preference estimates are read back in to estimate beliefs,
% they are read in from the "Output" folder (i.e. the original estimates 
% used in the paper". This could be altered to the "Output_New" folder if
% the user wants to use newly-generated estimates.

%% 0) import data inc set specification for preferences
clear; clc;
pref_spec=2; % run for 0, 1, 2 ( 2 is preferred specification and the one we carry through to belief estimation)
close all;
rng(88);

% import bootstrap weights
%---------------------------
w_expost = csvread('../Data/created_data/xp_w.csv', 0,0);
w_exante = csvread('../Data/created_data/xa_w.csv', 0,0);

% import data - ex ante
%-----------------------------
[xadata] = import_and_setup_exante;

% import and setup expost data
%-----------------------------
[xpdata,n_prefparms,n_groomchars] = import_and_setup_expost(pref_spec);

% set up states
% columns are: age, completed education, in school, current education (if
% in school)
%-----------------------------
setup_states;

%% 1) Estimate preferences
% set starting values 
%--------------------
% errors
ST_sigma_ep=2;% combined error

beta_start=0.1*ones(n_prefparms,1)'; % pref parms
p_starting_vals(1)=ST_sigma_ep;
i=1+size(beta_start,2);
p_starting_vals(2:i)=beta_start;
p_starting_vals(i+1)=0.2; % inattention share
lb=-10.*ones(size(p_starting_vals))
ub=Inf.*ones(size(p_starting_vals))
lb(end)=0
ub(end)=0.5
w=ones(size(xpdata.EP_choice));

% estimate preferences
options_f = optimoptions('fmincon', 'UseParallel', false, 'MaxIterations', 1e6, 'CheckGradients', false, 'MaxFunEvals', 1000000, ...
    'SpecifyObjectiveGradient', false, 'OptimalityTolerance', 1e-6, 'SpecifyConstraintGradient',false, 'FiniteDifferenceType',  'forward');

 [pref_estimates_real,ll,~,~,lambda,grad,H] = fmincon(@(alpha0) pref_mom(xpdata,n_prefparms,alpha0,w), ...
                    p_starting_vals, [], [], [], [],lb,ub, [], options_f);
 pref_estimates_real' 

 estimates.pref=pref_estimates_real
 estimates.prefs=pref_estimates_real

%% 1a) Figure A3: empirical histogram of groom quality (in ex-ante data)
beta_groom=[1 pref_estimates_real(2:7)]
size(xadata.G_exante)
size(beta_groom)
groom_qual=xadata.G_exante.*beta_groom;

groom_qual=sum(groom_qual,2);
good_groom=xadata.G_exante(:,1);

%
histogram(groom_qual)
xlabel("Groom Quality (Continuous)", 'FontSize', 14)
ylabel("Frequency in Ex-Ante Experiment", 'FontSize', 14)
saveas(gcf,'../Output_New/groom_quality.png')

%% 2) Bootstrap preferences
N=500;
bs_prefs=ones(size(p_starting_vals',1),N);

parfor n=0:N 
                n
                w=w_expost(:,n+1);
                [temp,ll,~,~,lambda,grad,H] = fmincon(@(alpha0) pref_mom(xpdata,n_prefparms,alpha0,w), ...
                            p_starting_vals, [], [], [], [],lb,ub, [], options_f);
                bs_prefs(:,n+1)=[temp'];
end

se= std(bs_prefs(:,2:end),0,2);       
real=bs_prefs(:,1);
t=real./se;

pref_results=[real se t]
colNames = {'coef','se','t'}
sTable = array2table(pref_results,'VariableNames',colNames) 

% p - college vs. high school (on spec 0 and 1)
    % bs_diff=bs_prefs(end-1,:)-bs_prefs(end-2,:)
    % se=std(bs_diff(:,2:end))
    % bs_dist=bs_diff(:,2:end)-mean(bs_diff(:,2:end),'all')
    % mean(abs(bs_diff(:,1))<abs(bs_dist))
    
% TABLE 2
%--------------------------------------------------------------------------
hello= sprintf('../Output_New/table_prefs_spec%d.xlsx',pref_spec);
writetable(sTable,hello)


% save bootstraps if spec==2
if pref_spec==2
    save('../Output_New/bs_results/bs_prefs','bs_prefs')
end

 %% 3) Estimate overall beliefs

 matvalues= ... % starting, lb, ub
    [   0.8,    0.1,    2; ...  % sigma - constant
        0,      0,      0; ...  % offer probability - het in age (for robustness only)
        -3,     -8,     1; ...  % beliefs - constant
        0,      -1,     1; ...  % beliefs - age
        1.2,    -2,     3; ...  % beliefs - ed
        0,      0,      0; ...  % beliefs - ed^2  (for robustness only) 
        2,      -1,     5; ...  % beliefs - college
        -0.1,   -0.5,   0.5; ... % beliefs - age x ed
        -0.2,   -0.5,   0.5; ... % beliefs - bad girl multiplier
        0.3,    0,      0.5; ... % gamma
        -5,     -16,    -1; ...  % VTp1 
        1,      1,     1; ...    % weight on u0 in VTp1
        pref_estimates_real(end),  pref_estimates_real(end),   pref_estimates_real(end)]    % inattentive share
%
starting_vals=matvalues(:,1)';
lb=matvalues(:,2)';
ub=matvalues(:,3)';

starting_vals=matvalues(:,1)';
lb=matvalues(:,2)';
ub=matvalues(:,3)';

n_beliefparms=7

w=ones(size(xadata.chosen_marry));

fixedparms.n_prefparms=n_prefparms
fixedparms.n_beliefparms=n_beliefparms
fixedparms.n_groomchars=n_groomchars
fixedparms.ind_goodgirl=1
fixedparms.ind_likeschool=1
fixedparms.prefparms=pref_estimates_real
fixedparms.disc=0.95;

spec=2; % preference spec

% check ll at the starting values
[total_ll] = aggregate_beliefs(states,xadata,fixedparms,spec,w,ub) 

rng(1);

fun=@(alpha0) aggregate_beliefs(states,xadata,fixedparms,spec,w,alpha0)
nvars=size(lb,2)
options = optimoptions('particleswarm','InitialSwarmMatrix',starting_vals,'UseParallel',true,'OutputFcn',{@pigraph},'FunctionTolerance',1e-4,'SwarmSize',100,'HybridFcn',@patternsearch,'Display','iter')
[x,f] = particleswarm(fun,nvars,lb,ub,options)

realx=x;
realf=f;

estimates.beliefs=x

save('../Output_New/main_estimates','estimates')

% FIGURE 4: Probability of high quality marriage offer 
%--------------------------------------------------------------------------
close all
load('../Output_New/main_estimates')
nicegraph(estimates.beliefs)
saveas(gcf,'../Output_New/prob_highoffer.png')

close all
% good girl only
nicegraph2(estimates.beliefs)
saveas(gcf,'../Output_New/beliefs_good.png')


%% 4) export choice probabilities & Beliefs 
load('../Output/main_estimates')

for ind_goodgirl=0
    for ind_likeschool=0
         prefparms=estimates.prefs; %prefparms(:,1)';
        xadata.w=ones(size(xadata,1),1);
          
       	bparms=estimates.beliefs; 

        fixedparms.ind_likeschool=ind_likeschool;
        fixedparms.ind_goodgirl=ind_goodgirl;

        [~,pM,pS,pS_2w,pi_global,V,Vtp1] =ex_ante_mom(states,xadata,fixedparms,spec,bparms);
        output=[states pM pS pS_2w];

        colNames = {'age','ed','inschool','current_sch','PrMarry_low','PrMarry_high','PrSch_low','PrSch_high','PrSch_nooffer'};
        sTable = array2table(output,'VariableNames',colNames)
        hello= sprintf('../Output_New/choiceprobs_like%d_good%d.xlsx',ind_likeschool,ind_goodgirl);
        writetable(sTable,hello);
    end

        output=[states pi_global];
        colNames = {'age','ed','inschool','current_sch','Pr_LowOffer','Pr_HighOffer'};
        sTable = array2table(output,'VariableNames',colNames)
        hello= sprintf('../Output_New/beliefs_good%d.xlsx',ind_goodgirl);
        writetable(sTable,hello)

end

%% 5) bootstrap beliefs
% reload preference matrices
temp=load('../Output/bs_results/bs_prefs.mat');
bs_prefs=temp.bs_prefs

options = optimoptions('particleswarm','InitialSwarmMatrix',starting_vals,'UseParallel',true,'FunctionTolerance',1e-4,'OutputFcn',{@pigraph},'SwarmSize',100,'HybridFcn',@patternsearch,'Display','iter')
nvars=size(lb,2)

N=100
bs=ones(size(starting_vals',1),N);

for n=0:N
    n
    w=w_exante(:,n+1);
    prefs_bs=bs_prefs(:,n+1)';
    fixedparms.prefparms=prefs_bs;

    rng(1);
    fun_bs=@(alpha0) aggregate_beliefs(states,xadata,fixedparms,spec,w,alpha0)
    [temp,f] = particleswarm(fun_bs,nvars,lb,ub,options)
    bs(:,n+1)=temp';
    end 

se= std(bs(:,2:end),0,2);       
real=bs(:,1);
t=real./se;

% TABLE 3: Structural Belief Parameters
%--------------------------------------------------------------------------
belief_results=[real se t]
colNames = {'coef','se','t'}
sTable = array2table(belief_results,'VariableNames',colNames) 

writetable(sTable,'../Output_New/table_beliefs.xlsx') 

save('../Output_New/bs_results/bs_beliefs','bs')

%% 6) export choice probabilities for each bootstrap
clear; clc;
pref_spec=2; % run for 0, 1, 2 ( 2 is preferred specification and the one we carry through to belief estimation)
close all;
rng(88);

% import data - ex ante
%-----------------------------
[xadata] = import_and_setup_exante;

% set up states
% columns are: age, completed education, in school, current education (if
% in school)
%-----------------------------
setup_states;

% load in bootstrapped beliefs
temp=load('../Output/bs_results/bs_beliefs.mat')
bs_beliefs=temp.bs;

% load in bootstrapped preferences
temp=load('../Output/bs_results/bs_prefs.mat')
bs_prefs=temp.bs_prefs;

spec=2;

fixedparms.n_prefparms=11
fixedparms.n_beliefparms=7
fixedparms.n_groomchars=7
fixedparms.ind_goodgirl=1
fixedparms.ind_likeschool=1
fixedparms.disc=0.95;


for b=1:101
for ind_goodgirl=0:1
    for ind_likeschool=0:1
         prefparms=bs_prefs(:,b)';
        fixedparms.prefparms=prefparms;
        xadata.w=ones(size(xadata,1),1);
          
       	bparms=bs_beliefs(:,b); 
        fixedparms.ind_likeschool=ind_likeschool;
        fixedparms.ind_goodgirl=ind_goodgirl;
        [~,pM,pS,pS_2w,pi,V,Vtp1] = ex_ante_mom(states,xadata,fixedparms,spec,bparms) ;
        
        output=[states pM pS pS_2w];

        colNames = {'age','ed','inschool','current_sch','PrMarry_low','PrMarry_high','PrSch_low','PrSch_high','PrSch_nooffer'};
        sTable = array2table(output,'VariableNames',colNames);
        hello= sprintf('../Output_New/bs_results/choiceprobs_bootstraps/choiceprobs_like%d_good%d_bs%d.xlsx',ind_likeschool,ind_goodgirl,b-1);
        writetable(sTable,hello);

    end

       output=[states pi];
       colNames = {'age','ed','inschool','current_sch','Pr_LowOffer','Pr_HighOffer'};
       sTable = array2table(output,'VariableNames',colNames);
       hello= sprintf('../Output_New/bs_results/choiceprobs_bootstraps/beliefs_good%d_bs%d.xlsx',ind_goodgirl,b-1);
       writetable(sTable,hello);

end
end